---
title: "Muris (2019), Section D.1, Design 2: DPD ADL(2) with six periods"
output:
  html_document:
    df_print: paged
---

# Setup

```{r}
library(AER)
library(tidyverse)
library(cowplot)
library(gmm)
library(plm)
```

# Data generation

A description of the code in this section is in `_design_1.Rmd`.

```{r}
DGP_dynamic_i <- function(i, T = 4, burn_in = 1,
                          rho = 0.9, beta1 = 1, beta2 = 0.0, sigma_a = 1, sigma_u = 0.1,
                          gamma0 = 1, gamma1 = 0.1, gamma2 = 0.4, tau = 1,sigma_v = 0.1){
  
  #############################################
  ### Explanation of the parameter values #####
  #############################################
  #
  ## general params
  # i is the personal identifier.
  # T is the number of time periods to export.
  # burn_in is a number of periods to get the process started (will be discarded)
  #
  ## outcome equation
  # rho <- 0.5 # coefficient on lagged Y
  # beta1 coefficient on xit
  # beta2 coefficient on xi,t-1
  # sigma_a is the variance of the unobserved heterogeneity
  # sigma_u is variance of error term in the outcomes equation
  #
  ## equation for x's
  # gamma0 is the constant term in this equation
  # gamma1 is the coefficient on first lag
  # gamma2 is the coefficient on the second lag
  # tau is the coefficient on alpha_i 
  # sigma_v variance of error terms in the equation for x's
  
  # Draw the parameter governing unobserved heterogeneity.
  alpha_i <- sigma_a*rnorm(1)
  # Starting value for the dependent variable
  Xi0 <- Ximin1 <- Yi0 <-  0
  # Set up the container.
  Y_i <- X_i <- numeric(burn_in+T)
  
  # Compute the first two periods: X
  X_i[1] <- gamma0  + gamma1*Xi0    + gamma2*Ximin1 + tau*alpha_i   + sigma_v*rnorm(1)
  X_i[2] <- gamma0  + gamma1*X_i[1] + gamma2*Xi0    + tau*alpha_i   + sigma_v*rnorm(1)
  # First two periods, T
  Y_i[1] <- alpha_i + rho*Yi0       + beta1*X_i[1] + beta2*Xi0     + sigma_u*rnorm(1)
  Y_i[2] <- alpha_i + rho*Y_i[1]    + beta1*X_i[2] + beta2*X_i[1]  + sigma_u*rnorm(1)
  
  # Remaining periods
  for(t in 3:(burn_in+T)){
    X_i[t] <- gamma0  + gamma1*X_i[t-1] + gamma2*X_i[t-2]  + tau*alpha_i     + sigma_v*rnorm(1)
    Y_i[t] <- alpha_i + rho*Y_i[t-1]    + beta1*X_i[t]     + beta2*X_i[t-1]  + sigma_u*rnorm(1)
  }
  # Return the final T periods
  export_period <- (burn_in+1):(burn_in+T)
  Y_i <- Y_i[export_period]
  X_i <- X_i[export_period]
  t <- 1:T
  return(as_tibble(cbind(Y_i,X_i,t,i)))
}
```

Try generating some data from this series:

```{r}
DGP_dynamic_i(4)
```

# Sets of moment conditions.

The moment conditions here differ from those in `_design_1.Rmd` because there are now six periods available. This leads to additional moment conditions. Furthermore, additional estimators are now available, like the complete case estimator. This section defines moment functions, one for each estimator.

First, the moment function `psi` for the infeasible estimator that uses the complete data.

```{r}
psi <- function(theta,Z){
  
  n <- nrow(Z)
  
  # Load the parameters
  rho = theta[1]
  beta1 = theta[2]
  beta2 = theta[3]
  
  # Compute the differenced residuals
  du6 = Z[,"dY6"] - rho*Z[,"dY5"] - beta1*Z[,"dX6"] - beta2*Z[,"dX5"]
  du5 = Z[,"dY5"] - rho*Z[,"dY4"] - beta1*Z[,"dX5"] - beta2*Z[,"dX4"]
  du4 = Z[,"dY4"] - rho*Z[,"dY3"] - beta1*Z[,"dX4"] - beta2*Z[,"dX3"]
  
  return( 
    cbind(
      
      # Sixth period!
      Z[,"Y3"] * du6,
      Z[,"Y2"] * du6,
      Z[,"Y1"] * du6,
      Z[,"X3"] * du6,
      Z[,"X2"] * du6,
      Z[,"X1"] * du6,
      # Fifth period
      Z[,"Y2"] * du5,
      Z[,"Y1"] * du5,
      Z[,"X2"] * du5,
      Z[,"X1"] * du5,
      # 
      Z[,"Y1"] * du4,
      Z[,"X1"] * du4
    )
  )  
}
```

The feasible efficient estimator uses only the incomplete data. The corresponding moment function `psiJ` is defined in the following chunk.

```{r}
psiJ <- function(theta,Z){
  
  n <- nrow(Z)
  
  # Load the parameters
  rho = theta[1]
  beta1 = theta[2]
  beta2 = theta[3]
  
  # Compute the differenced residuals
  du6 = Z[,"dY6"] - rho*Z[,"dY5"] - beta1*Z[,"dX6"] - beta2*Z[,"dX5"]
  du5 = Z[,"dY5"] - rho*Z[,"dY4"] - beta1*Z[,"dX5"] - beta2*Z[,"dX4"]
  du4 = Z[,"dY4"] - rho*Z[,"dY3"] - beta1*Z[,"dX4"] - beta2*Z[,"dX3"]
  
  return( 
    cbind(
      ## FOR COHORT == 0: FULL INCOMPLETE DATA
      ifelse(Z[,"cohort"] == 0,1,0) * Z[,"Y3"] * du6,
      ifelse(Z[,"cohort"] == 0,1,0) * Z[,"Y2"] * du6,
      ifelse(Z[,"cohort"] == 0,1,0) * Z[,"Y1"] * du6,
      ifelse(Z[,"cohort"] == 0,1,0) * Z[,"X3"] * du6,
      ifelse(Z[,"cohort"] == 0,1,0) * Z[,"X2"] * du6,
      ifelse(Z[,"cohort"] == 0,1,0) * Z[,"X1"] * du6,
      
      ifelse(Z[,"cohort"] == 0,1,0) * Z[,"Y2"] * du5,
      ifelse(Z[,"cohort"] == 0,1,0) * Z[,"Y1"] * du5,
      ifelse(Z[,"cohort"] == 0,1,0) * Z[,"X2"] * du5,
      ifelse(Z[,"cohort"] == 0,1,0) * Z[,"X1"] * du5,
      
      ifelse(Z[,"cohort"] == 0,1,0) * Z[,"Y1"] * du4,
      ifelse(Z[,"cohort"] == 0,1,0) * Z[,"X1"] * du4,
      
      ## FOR COHORT == 1: MISS PERIOD 6
      ifelse(Z[,"cohort"] == 1,1,0) * Z[,"Y2"] * du5,
      ifelse(Z[,"cohort"] == 1,1,0) * Z[,"Y1"] * du5,
      ifelse(Z[,"cohort"] == 1,1,0) * Z[,"X2"] * du5,
      ifelse(Z[,"cohort"] == 1,1,0) * Z[,"X1"] * du5,
      
      ifelse(Z[,"cohort"] == 1,1,0) * Z[,"Y1"] * du4,
      ifelse(Z[,"cohort"] == 1,1,0) * Z[,"X1"] * du4,
      
      ## FOR COHORT == 2: MISS PERIOD 1
      ifelse(Z[,"cohort"] == 2,1,0) * Z[,"Y3"] * du6,
      ifelse(Z[,"cohort"] == 2,1,0) * Z[,"Y2"] * du6,
      ifelse(Z[,"cohort"] == 2,1,0) * Z[,"X3"] * du6,
      ifelse(Z[,"cohort"] == 2,1,0) * Z[,"X2"] * du6,
      
      ifelse(Z[,"cohort"] == 2,1,0) * Z[,"Y2"] * du5,
      ifelse(Z[,"cohort"] == 2,1,0) * Z[,"X2"] * du5
    )
  )  
}
```

For this simulation design, additional estimators are available. In the following chunk, the function `psi_CC` defines the moment function for the complete case estimator.

```{r}
psi_CC <- function(theta,Z){
  
  n <- nrow(Z)
  
  # Load the parameters
  rho = theta[1]
  beta1 = theta[2]
  beta2 = theta[3]
  
  # Compute the differenced residuals
  du6 = Z[,"dY6"] - rho*Z[,"dY5"] - beta1*Z[,"dX6"] - beta2*Z[,"dX5"]
  du5 = Z[,"dY5"] - rho*Z[,"dY4"] - beta1*Z[,"dX5"] - beta2*Z[,"dX4"]
  du4 = Z[,"dY4"] - rho*Z[,"dY3"] - beta1*Z[,"dX4"] - beta2*Z[,"dX3"]
  
  return( 
    cbind(
      ## FOR COHORT == 0: FULL INCOMPLETE DATA
      ifelse(Z[,"cohort"] == 0,1,0) * Z[,"Y3"] * du6,
      ifelse(Z[,"cohort"] == 0,1,0) * Z[,"Y2"] * du6,
      ifelse(Z[,"cohort"] == 0,1,0) * Z[,"Y1"] * du6,
      ifelse(Z[,"cohort"] == 0,1,0) * Z[,"X3"] * du6,
      ifelse(Z[,"cohort"] == 0,1,0) * Z[,"X2"] * du6,
      ifelse(Z[,"cohort"] == 0,1,0) * Z[,"X1"] * du6,
      
      ifelse(Z[,"cohort"] == 0,1,0) * Z[,"Y2"] * du5,
      ifelse(Z[,"cohort"] == 0,1,0) * Z[,"Y1"] * du5,
      ifelse(Z[,"cohort"] == 0,1,0) * Z[,"X2"] * du5,
      ifelse(Z[,"cohort"] == 0,1,0) * Z[,"X1"] * du5,
      
      ifelse(Z[,"cohort"] == 0,1,0) * Z[,"Y1"] * du4,
      ifelse(Z[,"cohort"] == 0,1,0) * Z[,"X1"] * du4
    )
  )  
}
```

The following moment function `psi_CC1` is for an estimator that uses only information from cohorts 1 (and `psi_CC2` for cohort 2).

```{r}
psi_CC1 <- function(theta,Z){
  
  n <- nrow(Z)
  
  # Load the parameters
  rho = theta[1]
  beta1 = theta[2]
  beta2 = theta[3]
  
  # Compute the differenced residuals
  du6 = Z[,"dY6"] - rho*Z[,"dY5"] - beta1*Z[,"dX6"] - beta2*Z[,"dX5"]
  du5 = Z[,"dY5"] - rho*Z[,"dY4"] - beta1*Z[,"dX5"] - beta2*Z[,"dX4"]
  du4 = Z[,"dY4"] - rho*Z[,"dY3"] - beta1*Z[,"dX4"] - beta2*Z[,"dX3"]
  
  return( 
    cbind(
      ## FOR COHORT == 1: MISS PERIOD 6
      ifelse(Z[,"cohort"] == 1,1,0) * Z[,"Y2"] * du5,
      ifelse(Z[,"cohort"] == 1,1,0) * Z[,"Y1"] * du5,
      ifelse(Z[,"cohort"] == 1,1,0) * Z[,"X2"] * du5,
      ifelse(Z[,"cohort"] == 1,1,0) * Z[,"X1"] * du5,
      
      ifelse(Z[,"cohort"] == 1,1,0) * Z[,"Y1"] * du4,
      ifelse(Z[,"cohort"] == 1,1,0) * Z[,"X1"] * du4
    )
  )  
}

psi_CC2 <- function(theta,Z){
  
  n <- nrow(Z)
  
  # Load the parameters
  rho = theta[1]
  beta1 = theta[2]
  beta2 = theta[3]
  
  # Compute the differenced residuals
  du6 = Z[,"dY6"] - rho*Z[,"dY5"] - beta1*Z[,"dX6"] - beta2*Z[,"dX5"]
  du5 = Z[,"dY5"] - rho*Z[,"dY4"] - beta1*Z[,"dX5"] - beta2*Z[,"dX4"]
  du4 = Z[,"dY4"] - rho*Z[,"dY3"] - beta1*Z[,"dX4"] - beta2*Z[,"dX3"]
  
  return( 
    cbind(
      ## FOR COHORT == 2: MISS PERIOD 1
      ifelse(Z[,"cohort"] == 2,1,0) * Z[,"Y3"] * du6,
      ifelse(Z[,"cohort"] == 2,1,0) * Z[,"Y2"] * du6,
      ifelse(Z[,"cohort"] == 2,1,0) * Z[,"X3"] * du6,
      ifelse(Z[,"cohort"] == 2,1,0) * Z[,"X2"] * du6,
      
      ifelse(Z[,"cohort"] == 2,1,0) * Z[,"Y2"] * du5,
      ifelse(Z[,"cohort"] == 2,1,0) * Z[,"X2"] * du5
    )
  )  
}
```

The available case estimator uses the moment functions `psi_AC`, defined in the following code chunk.

```{r}
psi_AC <- function(theta,Z){
  
  n <- nrow(Z)
  
  # Load the parameters
  rho = theta[1]
  beta1 = theta[2]
  beta2 = theta[3]
  
  # Compute the differenced residuals
  du6 = Z[,"dY6"] - rho*Z[,"dY5"] - beta1*Z[,"dX6"] - beta2*Z[,"dX5"]
  du5 = Z[,"dY5"] - rho*Z[,"dY4"] - beta1*Z[,"dX5"] - beta2*Z[,"dX4"]
  du4 = Z[,"dY4"] - rho*Z[,"dY3"] - beta1*Z[,"dX4"] - beta2*Z[,"dX3"]
  
  # Recall:
  #  COHORT == 0: FULL INCOMPLETE DATA
  #  COHORT == 1: lacks period 6, has 1 through 5
  #  COHORT == 2: lacks period 1, has 2 through 6
  
  return( 
    cbind(
      
      ifelse(Z[,"cohort"] %in% c(0,2),1,0) * Z[,"Y3"] * du6,
      ifelse(Z[,"cohort"] %in% c(0,2),1,0) * Z[,"Y2"] * du6,
      ifelse(Z[,"cohort"] == 0,1,0) * Z[,"Y1"] * du6,
      ifelse(Z[,"cohort"] %in% c(0,2),1,0) * Z[,"X3"] * du6,
      ifelse(Z[,"cohort"] %in% c(0,2),1,0) * Z[,"X2"] * du6,
      ifelse(Z[,"cohort"] == 0,1,0) * Z[,"X1"] * du6,
      
      ifelse(Z[,"cohort"] %in% c(0,1,2),1,0) * Z[,"Y2"] * du5,
      ifelse(Z[,"cohort"] %in% c(0,1),1,0)   * Z[,"Y1"] * du5,
      ifelse(Z[,"cohort"] %in% c(0,1,2),1,0) * Z[,"X2"] * du5,
      ifelse(Z[,"cohort"] %in% c(0,1),1,0)   * Z[,"X1"] * du5,
      
      ifelse(Z[,"cohort"] %in% c(0,1),1,0) * Z[,"Y1"] * du4,
      ifelse(Z[,"cohort"] %in% c(0,1),1,0) * Z[,"X1"] * du4
    )
  )  
}
```
 
## DGP for simulation 
 
Set up the DGP in a way that `X`-distribution changes over time, and apply all the 6 estimators implicitly defined in the previous section.

```{r}
n <- 200

cohort0 <- 1:n %>% map_df(DGP_dynamic_i, T = 6, burn_in = 1,
                          rho = 0.9, beta1 = 0.5, beta2 = 0.2,
                          gamma0 = 1, gamma1 = 0.2, gamma2 = 0.2, tau = 0.2,sigma_v = 0.1) %>% 
  mutate(C = 0)

cohort1 <- 1:n %>% map_df(DGP_dynamic_i, T = 6, burn_in = 1,
                          rho = 0.9, beta1 = 0.5, beta2 = 0.2,
                          gamma0 = 2, gamma1 = 0.3, gamma2 = 0.3, tau = 0.3,sigma_v = 0.1) %>% 
  mutate(
    C = 1,
    i = i + n) # guarantees that ids are unique

cohort2 <- 1:n %>% map_df(DGP_dynamic_i, T = 6, burn_in = 1,
                          rho = 0.9, beta1 = 0.5, beta2 = 0.2,
                          gamma0 = 2, gamma1 = 0.4, gamma2 = 0.4, tau = 0.4,sigma_v = 0.1) %>% 
  mutate(
    C = 2,
    i = i + 2*n) # guarantees that ids are unique


df_new <- rbind(cohort0,cohort1,cohort2) %>% group_by(i) %>% 
  summarize(
    
    cohort = C[1],
    
    Y1 = Y_i[1],
    Y2 = Y_i[2],
    Y3 = Y_i[3],
    X1 = X_i[1],
    X2 = X_i[2],
    X3 = X_i[3],
    
    dY6 = Y_i[6]-Y_i[5],
    dY5 = Y_i[5]-Y_i[4],
    dY4 = Y_i[4]-Y_i[3],
    dY3 = Y_i[3]-Y_i[2],
    
    dX6 = X_i[6]-X_i[5],
    dX5 = X_i[5]-X_i[4],
    dX4 = X_i[4]-X_i[3],
    dX3 = X_i[3]-X_i[2],
    dX2 = X_i[2]-X_i[1]
  )

out1 <- gmm(g = psi, x = as.matrix(df_new), t0 = c(0.9,0.5,0.2))
out2 <- gmm(g = psiJ, x = as.matrix(df_new), t0 = c(0.9,0.5,0.2))
out3 <- gmm(g = psi_CC, x = as.matrix(df_new), t0 = c(0.9,0.5,0.2))
out4 <- gmm(g = psi_AC, x = as.matrix(df_new), t0 = c(0.9,0.5,0.2))
out5 <- gmm(g = psi_CC1, x = as.matrix(df_new), t0 = c(0.9,0.5,0.2))
out6 <- gmm(g = psi_CC2, x = as.matrix(df_new), t0 = c(0.9,0.5,0.2))
cbind(out1$coefficients,out2$coefficients,out3$coefficients,out4$coefficients,out5$coefficients,out6$coefficients)
```

It looks like things are working. The next section takes the next step of turning this experiment into a simulation study.

## Simulation functions

The following function corresponds to one simulation run. It is followed by a mock simulation study with a focus on `rho`.

```{r}
one_sim <- function(s, n = 100, rho_0 = 0.9, Delta = 0.1) {
  
  cohort0 <- 1:n %>% map_df(DGP_dynamic_i, T = 6, burn_in = 1,
                            rho = rho_0, beta1 = 0.5, beta2 = 0.2,
                            gamma0 = 1, gamma1 = 0.1, gamma2 = 0.1, tau = 0.1,sigma_v = 0.1) %>% 
    mutate(C = 0)
  
  cohort1 <- 1:n %>% map_df(DGP_dynamic_i, T = 6, burn_in = 1,
                            rho = rho_0, beta1 = 0.5, beta2 = 0.2,
                            gamma0 = 1, gamma1 = 0.1+Delta, gamma2 = 0.1+Delta, tau = 0.1+Delta,sigma_v = 0.1) %>% 
    mutate(
      C = 1,
      i = i + n) 
  
  cohort2 <- 1:n %>% map_df(DGP_dynamic_i, T = 6, burn_in = 1,
                            rho = rho_0, beta1 = 0.5, beta2 = 0.2,
                            gamma0 = 1, gamma1 = 0.1+2*Delta, gamma2 = 0.1+2*Delta, tau = 0.1+2*Delta,sigma_v = 0.1) %>% 
    mutate(
      C = 2,
      i = i + 2*n) 
  
  
  df_new <- rbind(cohort0,cohort1,cohort2) %>% group_by(i) %>% 
    summarize(
      
      cohort = C[1],
      
      Y1 = Y_i[1],
      Y2 = Y_i[2],
      Y3 = Y_i[3],
      X1 = X_i[1],
      X2 = X_i[2],
      X3 = X_i[3],
      
      dY6 = Y_i[6]-Y_i[5],
      dY5 = Y_i[5]-Y_i[4],
      dY4 = Y_i[4]-Y_i[3],
      dY3 = Y_i[3]-Y_i[2],
      
      dX6 = X_i[6]-X_i[5],
      dX5 = X_i[5]-X_i[4],
      dX4 = X_i[4]-X_i[3],
      dX3 = X_i[3]-X_i[2],
      dX2 = X_i[2]-X_i[1]
    )
  
  out1 <- gmm(g = psi, x = as.matrix(df_new), t0 = c(rho_0,0.5,0.2))
  out2 <- gmm(g = psiJ, x = as.matrix(df_new), t0 = c(rho_0,0.5,0.2))
  out3 <- gmm(g = psi_CC, x = as.matrix(df_new), t0 = c(rho_0,0.5,0.2))
  out4 <- gmm(g = psi_AC, x = as.matrix(df_new), t0 = c(rho_0,0.5,0.2))
  out5 <- gmm(g = psi_CC1, x = as.matrix(df_new), t0 = c(rho_0,0.5,0.2))
  out6 <- gmm(g = psi_CC2, x = as.matrix(df_new), t0 = c(rho_0,0.5,0.2))
  
  return(
    tibble(
      s = rep(s,6),
      
      rho_0 = rho_0,
      Delta = Delta,
      
      rho = c(out1$coefficients[1],out2$coefficients[1],
              out3$coefficients[1],out4$coefficients[1],
              out5$coefficients[1],out6$coefficients[1]),
      
      method = c("full","efficient","complete case","available case","cohort 1","cohort 2")
    )
  )
  
}

result <- 1:10 %>% map_df(one_sim)
result
```

The next chunk implements the actual simulation study, by repeating the procedure above, but for larger `S` and across different values of `rho` as well as `D_D_X`.

```{r}
SS <- 1000
nn <- 100

ss_07_05 <- 1:SS %>% map_df(one_sim, n = nn, rho_0 = 0.7, Delta = 0.05)
ss_08_05 <- 1:SS %>% map_df(one_sim, n = nn, rho_0 = 0.8, Delta = 0.05)
ss_09_05 <- 1:SS %>% map_df(one_sim, n = nn, rho_0 = 0.9, Delta = 0.05)

ss_07_15 <- 1:SS %>% map_df(one_sim, n = nn, rho_0 = 0.7, Delta = 0.15)
ss_08_15 <- 1:SS %>% map_df(one_sim, n = nn, rho_0 = 0.8, Delta = 0.15)
ss_09_15 <- 1:SS %>% map_df(one_sim, n = nn, rho_0 = 0.9, Delta = 0.15)


result_design2 <- rbind(
  ss_07_05,
  ss_08_05,
  ss_09_05,
  ss_07_15,
  ss_08_15,
  ss_09_15
)

result_design2 %>% ggplot() + geom_boxplot(aes(x = method, y = rho)) + facet_grid(rho_0~.)
```

Look at the RMSEs - this yields Panels 1, 2, and 3 in Table 7.

```{r}
result_design2 %>% group_by(rho_0,Delta,method) %>% 
  summarize(rmse = sqrt(mean((rho-rho_0)^2)),
            bias = mean(rho-rho_0),
            se = sd(rho)) %>% arrange(rmse)
```

Add one more design with larger `nn` (this corresponds to panel (4) in Table 7)

```{r}
ss_09_05_200 <- 1:SS %>% map_df(one_sim, n = 200, rho_0 = 0.9, Delta = 0.05)
```

Produce stats for this last one.

```{r}
ss_09_05_200 %>% group_by(rho_0,method) %>% 
  summarize(rmse = sqrt(mean((rho-rho_0)^2)),
            bias = mean(rho-rho_0),
            se = sd(rho)) %>% arrange(rmse)
```

Save the results for future reference.

```{r}
save.image("dpd_cohorts_design2.RData")
```

